#######################################################
### Code to create Figures 3-5, Appendix Tables 4-8 ###
#######################################################



################################################################################
### Setup 

# Clear workspace, call packages
rm(list=ls())
library(ggplot2)
library(dplyr)
library(stargazer)
library(sjPlot)
library(jtools)
library(lfe)
library(data.table)
library(lubridate)
library(Hmisc)
library(cowplot)
library(abmisc)



################################################################################
### Models predicting engagement

### Load and clean data

# Load tweets data, add identifying variables
tweets <- fread("activetweets111-116.csv", data.table=FALSE)
tweets <- tweets %>%
  mutate(year = str_extract(created_at, '^[0-9]{4}'),
         yearhandle = paste0(twitter_lower, '-', year)
         )

# Remove retweets
tweets <- tweets %>%
  filter(RT==0)

# Compute year-name like and retweet averages, merge to tweets
avgs <- tweets %>%
  group_by(yearhandle) %>%
  dplyr::summarize(
    likeavg = mean(public_metrics.like_count),
    retweetavg = mean(public_metrics.retweet_count)
  ) %>%
  ungroup()
tweets <- merge(tweets, avgs, by = "yearhandle")

# Compute difference between number of likes/retweets and year-name averages
tweets <- tweets %>%
  mutate(likediff = public_metrics.like_count - likeavg,
         retweetdiff = public_metrics.retweet_count - retweetavg)

# Convert difference to percent
tweets <- tweets %>%
  mutate(likepct = ((public_metrics.like_count - likeavg) / likeavg ) * 100,
         retweetpct = ((public_metrics.retweet_count - retweetavg) / retweetavg ) * 100)


### Run models
likes <- felm(likediff ~ polarizing | yearhandle + created_at, data = tweets)
rts <- felm(retweetdiff ~ polarizing | yearhandle + created_at, data = tweets)


### Produce Appendix Table 5
stargazer(likes, rts,
          Title = "Likes and Retweets above Average Predicted by Polarizing Rhetoric", 
          type="latex",
          covariate.labels = c("Polarizing"),
          column.labels = c("Difference from Average Likes", 
                            "Difference from Average Retweets")
          )


### Extract coefficients and errors

# Function to extract effects
effPlotDat <- function(model){
  # Pull coefficient
  coefs <- summary(model)$coefficients
  coef <- coefs['polarizing','Estimate']
  # Pull CI
  ci <- coefs['polarizing', 'Std. Error']*1.96
  # Combine/return
  effs <- c(coef=coef, ci=ci)
  effs
}

# Run function
effs <- sapply(list(likes, rts), function(x){
  effPlotDat(x)
}) %>% 
  t()


### Plot effects of polarizing rhetoric on engagement (Figure 3)
barCenters <- barplot(effs[,1], ylim=c(0, 600))
jpeg('engageMods_effects_polarizing.jpg', width=6, height=6, units='in', res=300)
par(mar=c(3.1, 4.1, 2.1, 1.1))
barplot(effs[,1], ylim=c(0, 600), ylab='Effect of Polarizing Rhetoric', 
        names.arg=c('Likes above\nAverage', 'Retweets above\nAverage'))
for(ii in 1:2){
  segments(x0=barCenters[ii], 
           y0=effs[ii,1]-effs[ii,2], 
           y1=effs[ii,1]+effs[ii,2], 
           lwd=3)
  arrows(x0=barCenters[ii], 
         y0=effs[ii,1]-effs[ii,2], 
         y1=effs[ii,1]+effs[ii,2], 
         lwd=3, angle=90, code=3, length=0.05)  
  text(x=barCenters[ii], 
       y=effs[ii,1]/2, 
       paste0(round(effs[ii,1], 1), ' +/- ', round(effs[ii,2], 1)))
}
par(mar=c(5.1, 4.1, 4.1, 2.1))
dev.off()



################################################################################
### Models predicting engagement separately by year

### Run models

# Create variables to help iterate through modeling by year
years <- 2010:2020
tweets %<>% mutate(years_num=num(year))

# Create empty lists to store models
likes_mods <- vector(mode='list', length=d(years))
rts_mods <- vector(mode='list', length=d(years))

# Run models, save in lists
for(ii in seq_along(years)){
  mod_df <- filter(tweets, years_num==years[ii])
  likes_mod <- felm(likepct ~ polarizing | yearhandle + created_at, data = mod_df)
  rts_mod <- felm(retweetpct ~ polarizing | yearhandle + created_at, data = mod_df)
  likes_mods[[ii]] <- likes_mod
  rts_mods[[ii]] <- rts_mod
}


### Create appendix tables 7 and 8
stargazer(likes_mods, font.size='tiny', keep.stat=c('n', 'adj.rsq'), 
          label='likeTabTime', covariate.labels='Uncivil',
          omit.table.layout='n', star.cutoffs=NA, dep.var.labels.include=FALSE, 
          column.labels=char(2010:2020), column.sep.width='0pt',
          title='OLS Models predicting the number of likes relative to the average tweet for each member, by year.') 
stargazer(rts_mods, font.size='tiny', keep.stat=c('n', 'adj.rsq'), 
          label='rtTabTime', covariate.labels='Uncivil',
          omit.table.layout='n', star.cutoffs=NA, dep.var.labels.include=FALSE, 
          column.labels=char(2010:2020), column.sep.width='0pt',
          title='OLS Models predicting the number of retweets relative to the average tweet for each member, by year.') 


### Format effects for plotting

# Pull out coefficients and CIs
effs_likes <- sapply(likes_mods, function(x){
  effPlotDat(x)
}) %>% 
  t()
effs_rts <- sapply(rts_mods, function(x){
  effPlotDat(x)
}) %>% 
  t()

# Add year and CI range values to effects data
effs_likes <- as.data.frame(effs_likes) %>%
  mutate(year=years,
         ci_lo=coef-ci,
         ci_hi=coef+ci) 
effs_rts <- as.data.frame(effs_rts) %>%
  mutate(year=years,
         ci_lo=coef-ci,
         ci_hi=coef+ci)


### Plot coefficients and standard errors (Figure 4)

# Likes
likes_plot <- ggplot(data=effs_likes, mapping=aes(year, coef)) + 
  geom_point(size=3) +
  geom_smooth(method='loess', se=FALSE, color='black', linetype='dashed') +
  geom_errorbar(aes(ymin=ci_lo, ymax=ci_hi), width=0.2, size=1) +
  ylab('Effect of Polarizing Rhetoric') +
  xlab('') +
  ggtitle('Likes above Average') +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=12)) +
  ylim(20, 120) +
  scale_x_continuous(breaks=seq(2010, 2020, 1)) +
  scale_y_continuous(breaks=seq(20, 120, 20)) +
  theme(panel.grid.minor.x = element_blank(), panel.grid.major.x=element_blank())

# Retweets
rts_plot <- ggplot(data=effs_rts, mapping=aes(year, coef)) + 
  geom_point(size=3) +
  geom_smooth(method='loess', se=FALSE, color='black', linetype='dashed') +
  geom_errorbar(aes(ymin=ci_lo, ymax=ci_hi), width=0.2, size=1) +
  ylab('Effect of Polarizing Rhetoric') +
  xlab('') +
  ggtitle('Retweets above Average') +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=12)) +
  ylim(20, 120) +
  scale_x_continuous(breaks=seq(2010, 2020, 1)) +
  scale_y_continuous(breaks=seq(20, 120, 20)) +
  theme(panel.grid.minor.x = element_blank(), panel.grid.major.x=element_blank())

# Plot together (Figure 4)
jpeg(file='engagement_effects_over_time.jpg', width=6, height=6, units='in', res=300)
cowplot::plot_grid(likes_plot, rts_plot, ncol=1)
dev.off()



################################################################################
#### Follower Analysis (Figure 5)

### Format data

# Load data
tweetsfollow <- read.csv("followerstweetsjoin.csv")

# Filter out outlier changes
tweetsfollow  <- filter(tweetsfollow, followerchange > -10000)
tweetsfollow  <- filter(tweetsfollow, icpsr.x != "NA")
tweetsfollow$divisivepp <- (tweetsfollow$divisive/tweetsfollow$tweets)*100
tweetsfollow <- filter(tweetsfollow, followerchange_pct > -20)
tweetsfollow <- filter(tweetsfollow, followerchange_pct < 50)


### Run models

# Raw change
weeklyfollowersdivisive <- felm(followerchange ~ divisive | icpsr.x + yearweek, data=tweetsfollow) 
weeklyfollowerstweets <- felm(followerchange ~ tweets | icpsr.x + yearweek, data=tweetsfollow) 
weeklyfollowerspct <- felm(followerchange ~ divisivepp | icpsr.x + yearweek, data=tweetsfollow)

# Percent change
weeklyfollowersdivisive2 <- felm(followerchange_pct ~ divisive | icpsr.x + yearweek, data=tweetsfollow)
weeklyfollowerstweets2 <- felm(followerchange_pct ~ tweets | icpsr.x + yearweek, data=tweetsfollow)
weeklyfollowerspct2 <- felm(followerchange_pct ~ divisivepp | icpsr.x + yearweek, data=tweetsfollow)


### Create Appendix Table 6
stargazer(weeklyfollowersdivisive, weeklyfollowerstweets, weeklyfollowerspct, 
          weeklyfollowersdivisive2, weeklyfollowerstweets2, weeklyfollowerspct2,
          Title = "Weekly Percentage Change in Twitter Followers Predicted by Twitter Activity", 
          type="latex",
          covariate.labels = c("Polarizing Tweets", "Total Tweets", "Pct. Polarizing"),
          dep.var.labels = c("Absolute Follower Change", "Pct. Change in Followers"))


### Extract coefficients and errors for plotting

# Put models into lists so we can iterate through 
raw_change <- list(weeklyfollowersdivisive, weeklyfollowerstweets, weeklyfollowerspct)
pct_change <- list(weeklyfollowersdivisive2, weeklyfollowerstweets2, weeklyfollowerspct2)

# Extract coefs and SEs
effPlotDat <- function(model){
  # Pull coefficient
  coefs <- summary(model)$coefficients
  coef <- coefs[,'Estimate']
  # Pull CI
  ci <- coefs[,'Std. Error']*1.96
  # Combine/return
  effs <- c(coef=coef, ci=ci)
  effs
}
effs_raw <- sapply(raw_change, effPlotDat) %>% t() %>% as.data.frame()
effs_pct <- sapply(pct_change, effPlotDat) %>% t() %>% as.data.frame()


### Plot effects (Figure 5)

# Raw change
barCenters <- barplot(effs_raw$coef, ylim=c(0, 40))
jpeg('Divisivefollowerchange.jpg', width=8, height=8, units='in', res=300)
par(mar=c(3.1, 4.1, 2.1, 1.1))
barplot(effs_raw$coef, ylim=c(0, 40), 
        ylab='Weekly Numerical Change in Twitter Followers', 
        names.arg=c('# Polarizing', '# Tweets', '% Polarizing')
        )
for(ii in 1:3){
  segments(x0=barCenters[ii], 
           y0=effs_raw$coef[ii]+effs_raw$ci[ii], 
           y1=effs_raw$coef[ii]-effs_raw$ci[ii], 
           lwd=3)
  arrows(x0=barCenters[ii], 
         y0=effs_raw$coef[ii]+effs_raw$ci[ii], 
         y1=effs_raw$coef[ii]-effs_raw$ci[ii], 
         lwd=3, angle=90, code=3, length=0.05)  
  text(x=barCenters[ii], 
       y=effs_raw$coef[ii]/2, 
       paste0(round(effs_raw$coef[ii], 2), ' +/- ', round(effs_raw$ci[ii], 2)))
}
par(mar=c(5.1, 4.1, 4.1, 2.1))
dev.off()

# Percent change
barCenters <- barplot(effs_pct$coef, ylim=c(0, 0.03))
jpeg('DivisivefollowerchangePct.jpg', width=8, height=8, units='in', res=300)
par(mar=c(3.1, 4.1, 2.1, 1.1))
barplot(effs_pct$coef, ylim=c(0, 0.03), 
        ylab='Weekly Percent Change in Twitter Followers', 
        names.arg=c('# Polarizing', '# Tweets', '% Polarizing')
        )
for(ii in 1:3){
  segments(x0=barCenters[ii], 
           y0=effs_pct$coef[ii]+effs_pct$ci[ii], 
           y1=effs_pct$coef[ii]-effs_pct$ci[ii], 
           lwd=3)
  arrows(x0=barCenters[ii], 
         y0=effs_pct$coef[ii]+effs_pct$ci[ii], 
         y1=effs_pct$coef[ii]-effs_pct$ci[ii], 
         lwd=3, angle=90, code=3, length=0.05)  
  text(x=barCenters[ii], 
       y=effs_pct$coef[ii]/2, 
       paste0(round(effs_pct$coef[ii], 3), ' +/- ', round(effs_pct$ci[ii], 3)))
}
par(mar=c(5.1, 4.1, 4.1, 2.1))
dev.off()